/* This program creates a dataset of Call Report Data.  It is the first step in constructing the bank health measures used
in Cooper and Peek's RESTAT paper. */

/*
NOTE: that this code will not actually run because, as we discussed in the master do file and bhreadme.docx, we are unable to provide the 
raw data. Public versions of the data can be obtained for much of the sample period and this first step 
will differ depending on where your raw data is from and how you want to put it together.

This do file outlines our approach with our data:
1) Opens each quarterly call report file that contains all possible call report variables 
2) Keeps only the variables needed for our analysis
3) Saves one (trimmed) dataset (.dta file) per quarter
4) Appends these quarterly datasets to generate a large time-series dataset (consisetent with our sample horizon).

If you just want to append your files without selecting relevant variables first, skip to line 96 : **Append datasets**
*/

local call_dir "" 		//directory with raw quarterly call report data files (ours are in .dta format)
local raw2020_dir "" 	//directory where you want to save the trimmed quarterly files (this step can be skipped)
local int2020_dir "" 	//directory where you want to save the overall (appended) time series dataset


*############CREATE CALL - BEGIN##########################################
*1) - Select time span of the dataset
local year_start 		=  			//First year of data - numeric year - YYYY
local year_end 			= 			//Last year of data - numeric year - YYYY
local first_quarters	= "" 		//include the numbers (as string %0.02f - see `all_quarters' below) of the quarters available in your first year of data 
local current_quarters	= "" 		//include the numbers (as string %0.02f - see `all_quarters' below) of the quarters available in your last year of data 
local all_quarters 		= "03 06 09 12"
 
**************************************************************************
*2) - Define variables to extract
**************************************************************************
*id variable here is called entity 
#delimit;
local keep_variables 
date_8 date_sas 
rcfd1246 rcfd1247 rcfd1249 rcfd1250 rcfd1403 rcfd1407 rcfd1410 rcfd1422 rcfd1423 rcfd2008 rcfd2122 
rcfd2123 rcfd2143 rcfd2150 rcfd2170 rcfd3163 rcfd3164 rcfd3165 rcfd3210 rcfd5506 rcfd5507 rcfd8274 
rcfda223 rcfdb026 rcfdb538 rcfn2200 rcfnb573 rcfnb574 rcon0296 rcon1211 rcon1212 rcon1410 rcon2008 
rcon3259 rcon9804 rssd9048 rssd9050 rssd9061 rssd9200 rssd9348 rssd9360 rssd9421 rssd9425 rssd9950 
;
#delimit cr

display "`keep_variables'"

clear

/*The following loop checks that a variable exists in a given quarterly dataset.
Each quarter does not necessarily have every variable, as mnemonics can change over time.
This step allows you to specify all possible variables that you'll need over a given sample, and checks  
which file contains which variable(s).
*/
forvalues yy = `year_start'(1)`year_end'{
	if `yy'==`year_start'{
		local z `first_quarters'
	}
	else if `yy'==`year_end'{
		local z `current_quarters'
	}
	else{
		local z "`all_quarters'"
	}
	foreach qq in `z' {
		local dd  "d`yy'`qq'.dta"
		display "`dd'"
		cd `call_dir'
		use "`call_dir'/`dd'", clear 
		*------------------------------
		* Add an empty variable if this vintage of data does not have all the variables you want to extract
		* "append" will not properly join two dataset with different sets of variables
		*------------------------------
		ren *,lower
		isvar `keep_variables'
		local missing "`r(badlist)'"  //missing here refers to a missing variable in a given quarterly dataset
		display "`missing'"
		foreach x in `missing'{
			display "`x'"
			gen `x' = .
		}
		cd `raw2020_dir'
		keep `keep_variables'
		duplicates drop
		save "`raw2020_dir'/`dd'", replace 			
	}
}

clear 



**Append datasets
forvalues yy = `year_start'(1)`year_end' {
	if `yy'==`year_start'{
		local z `first_quarters'
	}
	else if `yy'==`year_end'{
		local z `current_quarters'
	}
	else{
		local z "03 06 09 12"
	}
	foreach qq in `z' {
		local dd "d`yy'`qq'.dta"

		cd `int2020_dir'
		di "`dd'"
		quietly append using "`raw2020_dir'/`dd'", force
	}
}


** Save time series dataset.
		*##########################################################
		save "`int2020_dir'/bh-01a-callAppended",replace
		*##########################################################
		















